Contents

library(ggplot2)
library(patchwork)
library(ggnewscale)
library(SpatialData)
library(SpatialData.plot)
library(SingleCellExperiment)

1 Introduction

The SpatialData package contains a set of reader and plotting functions for spatial omics data stored as SpatialData .zarr files that follow OME-NGFF specs.

Each SpatialData object is composed of five layers: images, labels, shapes, points, and tables. Each layer may contain an arbitrary number of elements.

Images and labels are represented as ZarrArrays (Rarr). Points and shapes are represented as arrow objects linked to an on-disk .parquet file. As such, all data are represented out of memory.

Element annotation as well as cross-layer summarizations (e.g., count matrices) are represented as SingleCellExperiment as tables.

x <- file.path("extdata", "blobs.zarr")
x <- system.file(x, package="SpatialData")
(x <- readSpatialData(x, anndataR=FALSE))
## class: SpatialData
## - images(2):
##   - blobs_image (3,64,64)
##   - blobs_multiscale_image (3,64,64)
## - labels(2):
##   - blobs_labels (64,64)
##   - blobs_multiscale_labels (64,64)
## - points(1):
##   - blobs_points (200)
## - shapes(3):
##   - blobs_circles (5,circle)
##   - blobs_multipolygons (2,polygon)
##   - blobs_polygons (5,polygon)
## - tables(1):
##   - table (3,10)
## coordinate systems:
## - global(8): blobs_image blobs_multiscale_image ... blobs_polygons
##   blobs_points
## - scale(1): blobs_labels
## - translation(1): blobs_labels
## - affine(1): blobs_labels
## - sequence(1): blobs_labels

2 Visualization

2.0.1 Images

Image/LabelArrays are linked to potentially multiscale .zarr stores. Their show method includes the scales available for a given element:

image(x, "blobs_image")
## class: ImageArray  
## Scales (1): (3,64,64)
image(x, "blobs_multiscale_image")
## class: ImageArray (MultiScale) 
## Scales (3): (3,64,64) (3,32,32) (3,16,16)

Internally, multiscale ImageArrays are stored as a list of ZarrArray, e.g.:

i <- image(x, "blobs_multiscale_image")
vapply(i@data, dim, numeric(3))
##      [,1] [,2] [,3]
## [1,]    3    3    3
## [2,]   64   32   16
## [3,]   64   32   16

To retrieve a specific scale’s ZarrArray, we can use data(., k), where k specifies the target scale. This also works for plotting:

wrap_plots(nrow=1, lapply(seq(3), \(.) 
    plotSpatialData() + plotImage(x, i=2, k=.)))

2.0.2 Labels

i <- "blobs_labels"
t <- getTable(x, i)
t$id <- sample(letters, ncol(t))
table(x) <- t

p <- plotSpatialData()
pal_d <- hcl.colors(10, "Spectral")
pal_c <- hcl.colors(9, "Inferno")[-9]

a <- p + plotLabel(x, i) # simple binary image
b <- p + plotLabel(x, i, "id", pal=pal_d) # 'colData'
c <- p + plotLabel(x, i, "channel_1_sum", pal=pal_c) + 
    theme(legend.key.width=unit(1, "lines")) # 'assay'

(a | b | c) + 
    plot_layout(guides="collect") & 
    theme(legend.position="bottom")

2.0.3 Points

i <- "blobs_points"
p <- plotSpatialData()
# mock up a 'table'
f <- list(
  numbers=\(n) runif(n),
  letters=\(n) sample(letters, n, TRUE))
y <- setTable(x, i, f)
# demo. viz. capabilities
a <- p + plotPoint(y, i) # simple dots
b <- p + plotPoint(y, i, "letters") # discrete coloring
c <- p + plotPoint(y, i, "numbers") # continuous coloring
a | b | c

2.0.4 Shapes

p <- plotSpatialData()
a <- p +
  ggtitle("polygons") +
  plotShape(x, "blobs_polygons")
b <- p +
  ggtitle("multipolygons") +
  plotShape(x, "blobs_multipolygons")
c <- p +
  ggtitle("circles") +
  plotShape(x, "blobs_circles")
wrap_plots(a, b, c)

2.0.5 Layering

p <- plotSpatialData()
# joint
all <- p +
    plotImage(x) +
    plotLabel(x, a=1/3) +
    plotShape(x, 1) +
    plotShape(x, 3) +
    new_scale_color() +
    plotPoint(x, c="genes") +
    ggtitle("layered")
# split
one <- list(
    p + plotImage(x) + ggtitle("image"),
    p + plotLabel(x) + ggtitle("labels"),
    p + plotShape(x, 1) + ggtitle("circles"),
    p + plotShape(x, 3) + ggtitle("polygons"),
    p + plotPoint(x, c="genes") + ggtitle("points"))
wrap_plots(c(list(all), one), nrow=2)

3 Examples

3.1 MERFISH

In this example data, we do not have a label for the shape polygons. Such labels could be morphological regions annotated by pathologists.

dir.create(td <- tempfile())
pa <- unzip_spd_demo(
    zipname="merfish.zarr.zip", 
    dest=td, source="biocOSN")
(x <- readSpatialData(pa, anndataR=FALSE))
## class: SpatialData
## - images(1):
##   - rasterized (1,522,575)
## - labels(0):
## - points(1):
##   - single_molecule (3714642)
## - shapes(2):
##   - anatomical (6,polygon)
##   - cells (2389,circle)
## - tables(1):
##   - table (268,2389)
## coordinate systems:
## - global(4): rasterized anatomical cells single_molecule

There are only 2389 cells, but 3,714,642 molecules, so that we downsample a random subset of 2,000 for visualization:

# downsample 2,000 points
n <- length(p <- point(x))
q <- p[sample(n, 1e3)]
(point(x, "2k") <- q)
## class: PointFrame
## count: 1000 
## data(3): x y cell_type
# layered visualization
plotSpatialData() +
    plotImage(x) +
    plotPoint(x, i="2k", c="cell_type", s=0.2) +
    new_scale_color() +
    plotShape(x, i="anatomical") +
    scale_color_manual(values=hcl.colors(6, "Spectral")) 

# bounding-box query
qu <- list(xmin=1800, xmax=2400, ymin=5000, ymax=5400)
bb <- geom_rect(do.call(aes, qu), data.frame(qu), col="yellow", fill=NA)
y <- SpatialData(images=list("."=do.call(query, c(list(x=image(x)), qu))))
plotSpatialData() + plotImage(x) + bb | plotSpatialData() + plotImage(y)

3.2 VisiumHD

Mouse intestine, 1GB; 4 image resolutions and 3 shapes at 2, 8, and 16 \(\mu\)m.

dir.create(td <- tempfile())
pa <- unzip_spd_demo(
    zipname="visium_hd_3.0.0_io.zip", 
    dest=td, source="biocOSN")
(x <- readSpatialData(pa, images=4, shapes=3, tables=FALSE))
## class: SpatialData
## - images(1):
##   - Visium_HD_Mouse_Small_Intestine_lowres_image (3,558,600)
## - labels(0):
## - points(0):
## - shapes(1):
##   - Visium_HD_Mouse_Small_Intestine_square_016um (91033,circle)
## - tables(0):
## coordinate systems:
## - downscaled_lowres(2): Visium_HD_Mouse_Small_Intestine_lowres_image
##   Visium_HD_Mouse_Small_Intestine_square_016um
## - global(1): Visium_HD_Mouse_Small_Intestine_square_016um
## - downscaled_hires(1): Visium_HD_Mouse_Small_Intestine_square_016um
qu <- list(xmin=100, xmax=300, ymin=200, ymax=400)
bb <- geom_rect(do.call(aes, qu), data.frame(qu), col="black", fill=NA)
y <- SpatialData(images=list("."=do.call(query, c(list(x=image(x)), qu))))
plotSpatialData() + plotImage(x) + bb | plotSpatialData() + plotImage(y)

3.3 MibiTOF

Colorectal carcinoma, 25 MB; no shapes, no points.

dir.create(td <- tempfile())
pa <- unzip_spd_demo(
    zipname="mibitof.zip", 
    dest=td, source="biocOSN")
(x <- readSpatialData(pa, anndataR=FALSE))
## class: SpatialData
## - images(3):
##   - point16_image (3,1024,1024)
##   - point23_image (3,1024,1024)
##   - point8_image (3,1024,1024)
## - labels(3):
##   - point16_labels (1024,1024)
##   - point23_labels (1024,1024)
##   - point8_labels (1024,1024)
## - points(0):
## - shapes(0):
## - tables(1):
##   - table (36,3309)
## coordinate systems:
## - point16(2): point16_image point16_labels
## - point23(2): point23_image point23_labels
## - point8(2): point8_image point8_labels
pal <- hcl.colors(8, "Spectral")
wrap_plots(nrow=1, lapply(seq(3), \(.)
    plotSpatialData() + plotImage(x, .) + 
    plotLabel(x, ., "Cluster", pal=pal))) +
    plot_layout(guides="collect")

3.4 CyCIF (MCMICRO)

Small lung adenocarcinoma, 250 MB; 1 image, 2 labels, 2 tables.

dir.create(td <- tempfile())
pa <- unzip_spd_demo(
    zipname="mcmicro_io.zip", 
    dest=td, source="biocOSN")
(x <- readSpatialData(pa, anndataR=FALSE))
## class: SpatialData
## - images(1):
##   - exemplar-001_image (12,3139,2511)
## - labels(2):
##   - exemplar-001_cell (3139,2511)
##   - exemplar-001_nuclei (3139,2511)
## - points(0):
## - shapes(0):
## - tables(2):
##   - exemplar-001--ilastik_cell (12,11607)
##   - exemplar-001--unmicst_cell (12,11170)
## coordinate systems:
## - global(3): exemplar-001_image exemplar-001_cell exemplar-001_nuclei

Getting channel names for the image:

channels(image(x))
##  [1] "DNA_6" "ELANE" "CD57"  "CD45"  "DNA_7" "CD11B" "SMA"   "CD16"  "DNA_8"
## [10] "ECAD"  "FOXP3" "NCAM"

Plotting with multiple image channels:

plotSpatialData() + plotImage(x,
    ch=c("DNA_6", "CD45", "CD57"), 
    c=c("blue", "cyan", "yellow"))

3.5 IMC (Steinbock)

4 different cancers (SCCHN, BCC, NSCLC, CRC), 820 MB; 14 images, 14 labels, 1 table.

dir.create(td <- tempfile())
pa <- unzip_spd_demo(
    zipname="steinbock_io.zip", 
    dest=td, source="biocOSN")
x <- readSpatialData(pa, anndataR=FALSE)

Plotting with multiple image channels.

plotSpatialData() + plotImage(x, 
    i="Patient3_003_image",
    ch=c(6, 22, 39),
    c=c("blue", "cyan", "yellow"))

4 Masking

Back to blobs…

x <- file.path("extdata", "blobs.zarr")
x <- system.file(x, package="SpatialData")
x <- readSpatialData(x, tables=FALSE)
i <- "blobs_circles"
x <- mask(x, "blobs_points", i)
(t <- getTable(x, i))
## class: SingleCellExperiment 
## dim: 2 5 
## metadata(0):
## assays(1): counts
## rownames(2): gene_a gene_b
## rowData names(0):
## colnames(5): 1 2 3 4 5
## colData names(0):
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
p <- plotSpatialData() + 
    plotPoint(x, c="genes") +
    scale_color_manual(values=c("tomato", "cornflowerblue")) +
    new_scale_color()
lapply(names(c <- c(a="red", b="blue")), \(.)
    p + plotShape(x, i, c=paste0("gene_", .)) + 
        scale_color_gradient2(
            low="grey", high=c[.],
            limits=c(0, 8), n.breaks=5)) |>
    wrap_plots() + plot_layout(guides="collect")

# compute channel-wise means
i <- "blobs_labels"
x <- mask(x, "blobs_image", i, fun=mean)
(t <- getTable(x, i))
## class: SingleCellExperiment 
## dim: 3 10 
## metadata(0):
## assays(1): counts
## rownames(3): 1 2 3
## rowData names(0):
## colnames(10): 12 11 ... 16 4
## colData names(0):
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):
# visualize side-by-side
ps <- lapply(paste(seq_len(3)), \(.) 
    plotSpatialData() + plotLabel(x, i, .) + 
    ggtitle(paste("channel", ., "sum"))) 
wrap_plots(ps, nrow=1) & theme(
    legend.position="bottom", 
    legend.title=element_blank(),
    legend.key.width=unit(1, "lines"),
    legend.key.height=unit(0.5, "lines"))

5 Session info

## R version 4.4.1 Patched (2024-07-08 r86893)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.2.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Madrid
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] SingleCellExperiment_1.28.0 SummarizedExperiment_1.36.0
##  [3] Biobase_2.66.0              GenomicRanges_1.58.0       
##  [5] GenomeInfoDb_1.42.0         IRanges_2.40.0             
##  [7] S4Vectors_0.44.0            BiocGenerics_0.52.0        
##  [9] MatrixGenerics_1.18.0       matrixStats_1.4.1          
## [11] SpatialData.plot_0.99.1     SpatialData_0.99.20        
## [13] ggnewscale_0.5.0            patchwork_1.3.0            
## [15] ggplot2_3.5.1               BiocStyle_2.34.0           
## 
## loaded via a namespace (and not attached):
##   [1] DBI_1.2.3               RBGL_1.82.0             rlang_1.1.4            
##   [4] magrittr_2.0.3          Rarr_1.6.0              e1071_1.7-16           
##   [7] compiler_4.4.1          RSQLite_2.3.8           dir.expiry_1.14.0      
##  [10] paws.storage_0.7.0      png_0.1-8               vctrs_0.6.5            
##  [13] stringr_1.5.1           wk_0.9.4                pkgconfig_2.0.3        
##  [16] crayon_1.5.3            fastmap_1.2.0           magick_2.8.5           
##  [19] dbplyr_2.5.0            XVector_0.46.0          labeling_0.4.3         
##  [22] paws.common_0.7.7       utf8_1.2.4              rmarkdown_2.29         
##  [25] graph_1.84.0            UCSC.utils_1.2.0        tinytex_0.54           
##  [28] purrr_1.0.2             bit_4.5.0               xfun_0.49              
##  [31] zlibbioc_1.52.0         cachem_1.1.0            jsonlite_1.8.9         
##  [34] blob_1.2.4              DelayedArray_0.32.0     tweenr_2.0.3           
##  [37] parallel_4.4.1          R6_2.5.1                bslib_0.8.0            
##  [40] stringi_1.8.4           reticulate_1.40.0       jquerylib_0.1.4        
##  [43] Rcpp_1.0.13-1           bookdown_0.41           assertthat_0.2.1       
##  [46] knitr_1.49              R.utils_2.12.3          Matrix_1.7-1           
##  [49] tidyselect_1.2.1        rstudioapi_0.17.1       abind_1.4-8            
##  [52] yaml_2.3.10             zellkonverter_1.16.0    curl_6.0.1             
##  [55] lattice_0.22-6          tibble_3.2.1            basilisk.utils_1.18.0  
##  [58] withr_3.0.2             evaluate_1.0.1          sf_1.0-19              
##  [61] polyclip_1.10-7         units_0.8-5             proxy_0.4-27           
##  [64] BiocFileCache_2.14.0    pillar_1.9.0            BiocManager_1.30.25    
##  [67] filelock_1.0.3          KernSmooth_2.23-24      generics_0.1.3         
##  [70] nanoarrow_0.6.0         munsell_0.5.1           scales_1.3.0           
##  [73] class_7.3-22            glue_1.8.0              tools_4.4.1            
##  [76] grid_4.4.1              colorspace_2.1-1        GenomeInfoDbData_1.2.13
##  [79] basilisk_1.18.0         ggforce_0.4.2           cli_3.6.3              
##  [82] fansi_1.0.6             S4Arrays_1.6.0          arrow_17.0.0.1         
##  [85] dplyr_1.1.4             geoarrow_0.2.1          gtable_0.3.6           
##  [88] R.methodsS3_1.8.2       sass_0.4.9              digest_0.6.37          
##  [91] classInt_0.4-10         SparseArray_1.6.0       farver_2.1.2           
##  [94] memoise_2.0.1           htmltools_0.5.8.1       R.oo_1.27.0            
##  [97] lifecycle_1.0.4         httr_1.4.7              MASS_7.3-61            
## [100] bit64_4.5.2